# coding:utf8
# author：GXR

# 1、	抓取网站 新浪微博http://weibo.com/login.php?url=http%3A%2F%2Fweibo.com%2Fu%2F6309963572%2Fhome（8分）
# 2、	抓取转发、评论、点赞的相关用户信息（8分）
# 3、	抓取昵称、所在地、性别、生日、简介、注册时间这几个属性（8分）
# 4、	将这些个属性入库到mysql的表A（8分）
# 5、	抓取关注数，粉丝数，粉丝信息（昵称、所在地、性别、生日）（8分）
# 6、	将这些个属性入库到mysql的另外一张表B（8分）
# 7、	将A和B表做主外键关联（8分）

import requests, re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36',
    'Cookie': 'login_sid_t=9664336db64f6b91fa8860edd641b57b; cross_origin_proto=SSL; Ugrow-G0=8751d9166f7676afdce9885c6d31cd61; TC-V5-G0=8518b479055542524f4cf5907e498469; _s_tentry=-; Apache=8816408248902.625.1550709269361; SINAGLOBAL=8816408248902.625.1550709269361; ULV=1550709269368:1:1:1:8816408248902.625.1550709269361:; TC-Page-G0=8dc78264df14e433a87ecb460ff08bfe; YF-V5-G0=b8115b96b42d4782ab3a2201c5eba25d; wb_view_log=1536*8641.25; WBtopGlobal_register_version=ae9a9ec008078a68; ALF=1582245578; SSOLoginState=1550709579; SUB=_2A25xaYMbDeRhGeBP71oY-C7KyjmIHXVSHvPTrDV8PUNbmtBeLWTzkW9NRVkfFCr9ggeBUcnuxgORVFcUcwpm6F2l; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWgoRThQJ7FYWJnYKI.nb4i5JpX5KzhUgL.FoqpShn41h5ceK-2dJLoIpLxUgLLUFfGi--fiKnfiKnRi--NiK.XiKLs; SUHB=0sqxpzNCOLK4f9; YF-Page-G0=8fee13afa53da91ff99fc89cc7829b07; wvr=6; wb_view_log_6148980615=1536*8641.25; UOR=,,login.sina.com.cn',
}
session = requests.session()
response = session.get(url='https://weibo.com/u/6309963572?is_all=1', headers=headers)
nicheng = re.findall(r'user_name\\">(.*?)<', response.text, re.S)[0]
print(nicheng)
ul = re.findall(r'<ul class=\\"WB_row_line WB_row_r4 clearfix S_line2\\">(.*?)<\\/ul>', response.text, re.S)
for uu in ul:
    ss = re.findall(r'<\\/em><em>(.*?)<\\/em><\\/span>', uu, re.S)
    # for k, v in enumerate(ss):
    #     if not v.isdigit():
    #         ss[k] = 0
    shou, zhuan, ping, zan = ss
    print(shou, zhuan, ping, zan)
